/* 
    Purpose: Using the 1960 Census (5% sample), this file locates black and white women aged 
             30-50 who are mothers of a child younger than 18 in the same household. Other 
             variables necessary to create average predicted mother income (in 3d) are also 
             cleaned.

    Note: Income was asked of all individuals 14+ in the 1960 Census, 
          so there's no need to use a sample line weight (or any weight). 

    Creates: Census1960_mothers_ages30to50.dta
*/
clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1960_5pct_raw.dta, clear //download from IPUMS USA
    tab perwt //Confirmed: everyone receives a weight of 1.
            
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** SET UP INCOME VARIABLES
***************************

* Fix income variables 
    replace inctot=. if inctot==9999999 
    replace inctot=0 if inctot<0
            
* Family income (v1--Census variable)
    replace ftotinc=. if ftotinc==9999999 
    replace ftotinc=0 if ftotinc<0
            
* Family income (v2--manually construct by summing individual income of family unit members)
    bysort serial famunit: egen fam_income = sum(inctot)

* Count discrepancies between v1 and v2 
    /*Note: Discrepancies appear to come 
            mostly from individuals living 
            in group quarters. */                
    count if ftotinc==. & fam_income>0 

* Harmonize v1 and v2                     
    replace ftotinc = fam_income if ftotinc==. & fam_income>0
            
* Household income

    //Grab one family member's income
    sort serial famunit pernum
    by serial famunit: gen fam_head = _n==1
            
    gen temp = 0
    replace temp = ftotinc if fam_head==1 
    replace temp = 0 if ftotinc==. 

    //Add up incomes of "separate" families within a serial to get household income (i.e. income by serial number)
    bysort serial: egen hh_income = sum(temp)
    drop temp fam_head fam_income 
    
    rename ftotinc fam_income 
            
/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
    gen CPI1960 = 29.6
    gen CPI1950= 24.1
            
    foreach var of varlist inctot fam_income hh_income {
        replace `var' = `var' * (CPI1950 / CPI1960)
        }

* Keep respondents with non-zero and non-missing income 
    foreach var of varlist inctot fam_income hh_income {
    drop if `var'==0 | `var'==.
    }
    
    tempfile fulldata
    save `fulldata'

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** IDENTIFY MOTHERS
***************************

    keep if age<18 //Restrict to children younger than 18
    keep serial momloc age
    
    replace momloc=. if momloc==0 
    drop if momloc==. //Exclude children without a mother in the house
    
    bysort serial momloc: keep if _n==1 //Keep all unique mother ids. Some mothers will have multiple children in the Census. 
    rename momloc pernum
    drop age

    tempfile children 
    save `children'

* Keep the sample of mothers 
    use `fulldata', clear
    merge 1:1 serial pernum using `children'
    keep if _merge==3 
    drop _merge

** Keep mothers that are heads of household
    tab relate
    keep if relate==1
                     
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***************************
** CREATE OTHER NECESSARY VARIABLES
***************************

* Keep black and white mothers ages 30 to 50
    keep if age>=30 & age<=50
    keep if race==1 | race==2

* Region of current residence 
    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division
    
    gen region_merge =.
    replace region_merge =1 if (region==11 | region==12) //Northeast
    replace region_merge =2 if (region==21 | region==22) //Midwest
    replace region_merge =3 if inrange(region,31,33) //South
    replace region_merge =4 if (region==41 | region==42) //West
    tab region, m
    tab region_merge, m
            
    label define region_l 1 "NORTHEAST" 2 "MIDWEST" 3 "SOUTH" 4 "WEST"
    label values region_merge region_l
    tab region_merge, m
            
    gen south_merge = region_merge==3
    
* Education variable 
    gen edu=.
    replace edu=1 if educd<=25 //<grade school (includes people with no schooling)
    replace edu=2 if educd==26 //8th grade
    replace edu=3 if inlist(educd,30,40,50,61) //<hs
    replace edu=4 if inlist(educd,60,62,63,64) //hs
    replace edu=5 if educd>64 & educ<999 //>hs. "999" is missing
    tab edu, m
    
        
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
    sort occ1950
    replace occ1950=. if occ1950>=980

* Count # of Census occupations in 1960 data
    bysort occ1950: gen nvals = _n ==1
    count if nvals==1 //223

* Separate people with occupations in 200's based on self-employment
    replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr==1
            
* Crosswalk Census occupations to coarsened ANES occupations
    merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
    assert _merge!=1
    drop if _merge==2
    drop _merge
            
    tab occ1950 if occ1950ej==., m
    drop if occ1950ej==. 
            
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** SAVE 
****************

* Keep relevant variables              
    rename occ1950ej motheroccej

    keep race south_merge motheroccej inctot fam_income hh_income perwt
                    
    gen census=1
    label var census "Census obs"
    
    foreach var of varlist inctot fam_income hh_income {
    gen log_mother_`var' = log(`var')
    label var log_mother_`var' "Log `var', Census"
    label var `var' "`var', Census"
    }
    
    rename fam_income faminc
    rename hh_income HHinc
        
    compress 
    save ./output/Census1960_mothers_ages30to50.dta, replace
                    
    
